// NationscapeUnemployment

log using NationscapeUnemployment.log, replace
// Original congressional district-level 2019 unemployment data imported from https://data.census.gov/table/ACSST1Y2019.S2301?q=S2301&g=010XX00US$5000000&y=2019
use "C:\Users\sbstjp\OneDrive - Cardiff University\2019ConDisUnempData.dta"

*There are double copies of the c437 observations - cut the doubles
drop in 438/874

*keep necessary variables
keep geo_id name s2301_c01_028e s2301_c04_001e s2301_c04_013e

generate Year = 2019, before(s2301_c01_028e)

* Same has been done with 2021 data - now append 
append using "C:\Users\sbstjp\OneDrive - Cardiff University\2021ConDisUnempData.dta"

* Replace "N" with missing value (.)
replace s2301_c04_013e = "." if s2301_c04_013e == "N"

* Convert the variable from string to numeric
destring s2301_c04_013e, replace

* Ensure the data is sorted by the key variables
sort geo_id Year

* Calculate percentage change for each variable, excluding variables ending in 'm'
foreach var in s2301_c01_028e s2301_c04_001e s2301_c04_013e {
    gen pct_change_`var' = .
    by geo_id (Year), sort: replace pct_change_`var' = 100 * (`var'[_n+1] - `var'[_n]) / `var'[_n] if Year == 2019
}

*Drop unnecessary variables
drop if Year==2021
drop Year

// Generate a variable which can be matched with Voter Survey
* Extract the last four digits from geo_id
gen congress_district = substr(geo_id, -4, 4)

* Create a variable for state abbreviations based on the first two digits of congress_district
gen state_fips = substr(congress_district, 1, 2)

* Map state FIPS codes to state abbreviations
gen state_abbrev = ""
replace state_abbrev = "AL" if state_fips == "01"  // Alabama
replace state_abbrev = "AK" if state_fips == "02"  // Alaska
replace state_abbrev = "AZ" if state_fips == "04"  // Arizona
replace state_abbrev = "AR" if state_fips == "05"  // Arkansas
replace state_abbrev = "CA" if state_fips == "06"  // California
replace state_abbrev = "CO" if state_fips == "08"  // Colorado
replace state_abbrev = "CT" if state_fips == "09"  // Connecticut
replace state_abbrev = "DE" if state_fips == "10"  // Delaware
replace state_abbrev = "FL" if state_fips == "12"  // Florida
replace state_abbrev = "GA" if state_fips == "13"  // Georgia
replace state_abbrev = "HI" if state_fips == "15"  // Hawaii
replace state_abbrev = "ID" if state_fips == "16"  // Idaho
replace state_abbrev = "IL" if state_fips == "17"  // Illinois
replace state_abbrev = "IN" if state_fips == "18"  // Indiana
replace state_abbrev = "IA" if state_fips == "19"  // Iowa
replace state_abbrev = "KS" if state_fips == "20"  // Kansas
replace state_abbrev = "KY" if state_fips == "21"  // Kentucky
replace state_abbrev = "LA" if state_fips == "22"  // Louisiana
replace state_abbrev = "ME" if state_fips == "23"  // Maine
replace state_abbrev = "MD" if state_fips == "24"  // Maryland
replace state_abbrev = "MA" if state_fips == "25"  // Massachusetts
replace state_abbrev = "MI" if state_fips == "26"  // Michigan
replace state_abbrev = "MN" if state_fips == "27"  // Minnesota
replace state_abbrev = "MS" if state_fips == "28"  // Mississippi
replace state_abbrev = "MO" if state_fips == "29"  // Missouri
replace state_abbrev = "MT" if state_fips == "30"  // Montana
replace state_abbrev = "NE" if state_fips == "31"  // Nebraska
replace state_abbrev = "NV" if state_fips == "32"  // Nevada
replace state_abbrev = "NH" if state_fips == "33"  // New Hampshire
replace state_abbrev = "NJ" if state_fips == "34"  // New Jersey
replace state_abbrev = "NM" if state_fips == "35"  // New Mexico
replace state_abbrev = "NY" if state_fips == "36"  // New York
replace state_abbrev = "NC" if state_fips == "37"  // North Carolina
replace state_abbrev = "ND" if state_fips == "38"  // North Dakota
replace state_abbrev = "OH" if state_fips == "39"  // Ohio
replace state_abbrev = "OK" if state_fips == "40"  // Oklahoma
replace state_abbrev = "OR" if state_fips == "41"  // Oregon
replace state_abbrev = "PA" if state_fips == "42"  // Pennsylvania
replace state_abbrev = "RI" if state_fips == "44"  // Rhode Island
replace state_abbrev = "SC" if state_fips == "45"  // South Carolina
replace state_abbrev = "SD" if state_fips == "46"  // South Dakota
replace state_abbrev = "TN" if state_fips == "47"  // Tennessee
replace state_abbrev = "TX" if state_fips == "48"  // Texas
replace state_abbrev = "UT" if state_fips == "49"  // Utah
replace state_abbrev = "VT" if state_fips == "50"  // Vermont
replace state_abbrev = "VA" if state_fips == "51"  // Virginia
replace state_abbrev = "WA" if state_fips == "53"  // Washington
replace state_abbrev = "WV" if state_fips == "54"  // West Virginia
replace state_abbrev = "WI" if state_fips == "55"  // Wisconsin
replace state_abbrev = "WY" if state_fips == "56"  // Wyoming

* Combine state abbreviation with congressional district number
gen formatted_district = state_abbrev + substr(congress_district, 3, 2)

drop in 1 // the first observation is empty
drop congress_district state_fips state_abbrev
rename formatted_district congress_district

*For the later merge 
replace congress_district = "DC00" in 88
replace congress_district = "PR00" in 437

save "C:\Users\sbstjp\OneDrive - Cardiff University\RevisedConDisUnempData.dta" // Now save as new file

clear

// Now work with Nationscape
use "C:\Users\sbstjp\OneDrive - Cardiff University\SingleNationscape\Nationscapenewclean.dta" // Tausanovitch, Chris and Lynn Vavreck. 2020. Democracy Fund + UCLA Nationscape, October 10-17, 2019 (version 20200814). Retrieved from [URL].  Date accessed: March 09, 2025.

keep weight statements_gender_identity reparations group_favorability_blm group_favorability_undocumented congress_district start_date

// Social justice scale
*Drop missing values
replace statements_gender_identity=. if statements_gender_identity>4 
replace group_favorability_blm=. if group_favorability_blm>4 
replace reparations=. if reparations>2 
replace group_favorability_undocumented=. if group_favorability_undocumented==999 

*Rename so shorter
rename group_favorability_blm blm
rename group_favorability_undocumented undocumented
rename statements_gender_identity genderidentity

*Reverse variables so social justice is coded high
foreach var in blm reparations undocumented {
    qui sum `var'
    local max_value = r(max)
    gen r`var' = `max_value' + 1 - `var'
}

*Standardize items in the scale from 1-2 - this avoids 0, for reasons outlined in next step
foreach var in rblm rreparations rundocumented genderidentity {
    summarize `var'
    gen s`var' = 1 + (`var' - r(min)) / (r(max) - r(min))
}

* Replace missing values with 0 for the specified variables - this is necessary as Stata doesn't add up missing values
foreach var in srblm srreparations srundocumented sgenderidentity {
    replace `var' = 0 if missing(`var')
}

* Initialize the total score and the count of non-zero responses
gen total_scoreSJV = 0
gen count_nonzeroSJV = 0

* Add each variable to the total scale score and count it if non-zero
foreach var in srblm srreparations srundocumented sgenderidentity {
    replace total_scoreSJV = total_scoreSJV + `var'
    replace count_nonzeroSJV = count_nonzeroSJV + (`var' != 0)
}

* Calculate the average score, avoiding division by zero
gen SocJusValues = .
replace SocJusValues = total_scoreSJV / count_nonzeroSJV if count_nonzeroSJV > 0

merge m:1 congress_district using "C:\Users\sbstjp\OneDrive - Cardiff University\CongressionalUnemployment.dta" // This file uses data on congressional-level socioeconomic indicators from the American Community Survey. Such data were unavailable in 2020, i.e. the year in which Nationscape was fielded, and the dataset uses the % change between 2019 and 2021. 

*Rename some variables
rename pct_change_s2301_c04_001e totalunempratechange // Change in total unemployment rate from 2019-21
rename pct_change_s2301_c04_013e blackunempchange // Change in Black unemployment rate from 2019-21
rename pct_change_s2301_c01_028e belowpovlineinlast12mnths // Change in % of population below poverty line in last 12 months from 2019-21

*Examine only 2020 observations
gen date = dofc(start_date)
gen year = year(date)
keep if year == 2020

// Exploratory analysis
pwcorr SocJusValues totalunempratechange belowpovlineinlast12mnths blackunempchange [aweight=weight], sig 

log close